import urllib.request
import urllib.parse
from lxml import etree

def xml_handle(data):
    result = etree.HTML(data)
    # 这个数据就是
    xml_Normal = result.xpath('//div[@class="RichText ztext Post-RichText css-117anjg"]/p/text()')
    return xml_Normal

i = ""
def data_handle(list):
    handle_list = []
    # 把无用数据进行删除
    for k in range(2):
        list.pop(867)
        # 数据筛选
    for index,item in enumerate(list):
        i = str(index+1) + "."
        # 而index是不可以被改变的
        if(index+1 >= 868):
            i = str(index+2) + "."
        item_data = item.replace(i,"")
        handle_list.append(item_data)
    return handle_list

def write(data):
    try:
        url = "http://127.0.0.1:5000/TruthDare/Event/write"
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
        }
        Request_write = urllib.request.Request(url=url,headers = headers, data=data)
        urllib.request.urlopen(Request_write)
        return "写入成功"
    except Exception as err:
        return print(err)



if __name__ == "__main__":
    url = "https://zhuanlan.zhihu.com/p/575599203"            #爬取数据网址
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
    }
    Request = urllib.request.Request(url=url, headers=headers)              #封装请求
    # 直接请求好吧
    requise = urllib.request.urlopen(Request)
    requise_data = requise.read().decode('utf-8')
    # 之后到xml中去
    xml_list =xml_handle(requise_data)
    # 处理完的数据列表
    handle_list = data_handle(xml_list)
    # 去重
    handle_list=list(set(handle_list))
    for k in handle_list:
        data = {
            "True_words":k
        }
        # 但主键问题是什么意思?
        # # 而在这个地方没有什么其他问题
        data = urllib.parse.urlencode(data).encode("utf-8")
        write(data)




